//do ...\MakeAttribute_struct_demo_BLP_robustd_choiceset_2008_2012_v11_heter_inc_set_seed_bs_11022017.do


// Prepare matlab estimation files for different income groups: 1 to 6
// This script takes a random sample from the raw data and create the files that
// are used by the matlab scripts.

//Script needed to create the price time series:
//do ...\MakePriceTS_11022017.do


clear all
set more off
set mem 10000m
pause on


global pathname="raw_data_folder"
global censuspath="raw_data_folder"
global sales_tax_path="raw_data_folder"

local subsample=`"`1'"'               //Size of the subsample: 44000
local group_id_selected=`"`2'"' 	  //Income category to select: 1, 2, 3, 4, 5, or 6
local do_preprocess=`"`3'"'           //0-1: Merge yearly dataset
local do_price_ts=`"`4'"'             //0-1: Create price time series
local set_seed=`"`5'"'                //Chose a seed for the random sample so it can be replicated

// Fix the seed for replication purpose
set seed `set_seed' 

/*
Those are the definition of the different group id used.
In this project, we use group_id=1, 2, 3, 4, 5, 6, or 16 (all income groups)
 
//	   gen group_id=0
//     replace group_id=1 if Dhd_income_1==1
//     replace group_id=2 if Dhd_income_2==1
//     replace group_id=3 if Dhd_income_3==1
//     replace group_id=4 if Dhd_income_4==1
//     replace group_id=5 if Dhd_income_5==1
//     replace group_id=6 if Dhd_income_6==1
//     replace group_id=7 if Dhd_renter_1==1
//     replace group_id=8 if Dhd_renter_2==1
//     replace group_id=9 if Dnonhd_tert1==1
//     replace group_id=10 if Dnonhd_tert2==1
//     replace group_id=11 if Dnonhd_tert3==1
//     replace group_id=12 if Dhd_miss_demo==1
//     replace group_id=13 if Dcontractor_tert1==1
//     replace group_id=14 if Dcontractor_tert2==1
//     replace group_id=15 if Dcontractor_tert3==1
    replace group_id=16 if Dhd==1 & miss_demo==0 & rent!="R"
//    replace group_id=17 if Dhd==1 & rent!="R"
    replace group_id=18 if Dhd==0 & miss_demo==0 
//    replace group_id=19 if Dhd==0 
//    replace group_id=20 if miss_demo==0 
*/

local sales_pid_cutoff=min(max(ceil(`subsample'*0.0005),5),25) 

local ranksample=1

// Create a time series of price for each product in the sample
if `do_price_ts'==1 {

	forvalues year_p=2008(1)2012 {
	
	 do "...\MakePricesTS_11022017.do" `year_p' 
	
 }
}


if `do_preprocess'==1 {

forvalues year_p=2008(1)2012 {

use $pathname\lcidemo_046_jan`year_p'_dec`year_p', clear
	drop if state=="PR" | state=="VI" | state=="GU"
	keep if year==`year_p'
    drop if online==1
    keep if store=="A" | store=="B"
    sort pid week
	merge pid week using  $pathname\attributes_`year_p'_weekly
	keep if _m==3
	drop _m
    
	drop if retail_p<280
    drop if retail_p>4100
    
    sum retail
    
    gen bad_pid=0
	replace bad_pid=1 if type_id==0 | type_id==.
	replace bad_pid=1 if size_id==0 | size_id==.
	replace bad_pid=1 if standard_class=="unknown"	
	drop if bad_pid==1 
	sort state zip 
	
	gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))

//Select households that made one purchase, live in single family housing and do not rent.		
	/*
	drop if o_qty>1 & o_qty!=. 
	//drop if housing==""
	drop if housing=="M"
	drop if rent=="R"
    */
//demographic information			
	destring age adult children income education, replace
	/*
	drop if income==.
	drop if age==.
	drop if adult==.
	drop if children==.
	drop if education==.
	*/
	//drop if education==4
    replace education=1 if education==4
	gen income_sub=cond(income<=3,1,cond(income>=4 & income<=6,2,3))
	gen income_tert=cond(income<=5,1,cond(income>=6 & income<=7,2,3))
	gen income_six=cond(income<=3,1,cond(income>=4 & income<=5,2,cond(income==6,3,cond(income==7,4,cond(income==8,5,6)))))
	replace income_sub=.  if income==.
	replace income_tert=.  if income==.
	replace income_six=.  if income==.
	
    //drop if political==""
    gen political_id=cond(political=="R",1,cond(political=="D",2,3))
	capture destring hd_id, replace
	//drop if hd_id==.
	drop v1* 
save $pathname\lcidemo_all_046_jan`year_p'_dec`year_p'_struct_nocensor, replace
}



use $pathname\lcidemo_all_046_jan2008_dec2008_struct_nocensor, replace
	append using $pathname\lcidemo_all_046_jan2009_dec2009_struct_nocensor
	append using $pathname\lcidemo_all_046_jan2010_dec2010_struct_nocensor
    append using $pathname\lcidemo_all_046_jan2011_dec2011_struct_nocensor
	append using $pathname\lcidemo_all_046_jan2012_dec2012_struct_nocensor

	replace tri=tri+3 if year==2009
    replace tri=tri+6 if year==2010
    replace tri=tri+9 if year==2011
    replace tri=tri+12 if year==2012
    
    
    replace month=month+12 if year==2009
    replace month=month+24 if year==2010
    replace month=month+36 if year==2011
    replace month=month+48 if year==2012
    
    gen week_num=week
    replace week_num=week_num+52  if year==2009
    replace week_num=week_num+104 if year==2010
    replace week_num=week_num+156 if year==2011
    replace week_num=week_num+208 if year==2012
    
//Construction of a robust choice set
   
    //For 2008, we are being conservative and drop observations if the pid disappeared during the second trimester 
	sort pid week_num
	by pid: egen max_week=max(week_num)
	drop if max_week<31 & trimester==2 & year==2008
	
/*	
    sort pid zipcode
	by pid zipcode: egen max_month=max(month)
	by pid zipcode: egen max_week_num=max(week_num)
	by pid: egen min_max_month=min(max_month)
*/
		/*
	. tab max_month if delisted==1 & delisted2010!=1

  max_month |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      8,577        8.26        8.26
          2 |      9,864        9.49       17.75
          3 |     18,050       17.37       35.12
          4 |      9,349        9.00       44.12
          5 |      8,842        8.51       52.64
          6 |      3,019        2.91       55.54
          7 |      2,399        2.31       57.85
          8 |      2,478        2.39       60.24
          9 |      1,803        1.74       61.97
         10 |         87        0.08       62.05
         11 |      2,031        1.95       64.01
         12 |      1,880        1.81       65.82
         13 |      1,812        1.74       67.56
         14 |      1,257        1.21       68.77
         15 |      1,623        1.56       70.34
         16 |      1,340        1.29       71.63
         17 |      1,869        1.80       73.42
         18 |      1,858        1.79       75.21
         19 |      2,591        2.49       77.71
         20 |      2,900        2.79       80.50
         21 |      2,598        2.50       83.00
         22 |      2,770        2.67       85.67
         23 |      4,228        4.07       89.74
         24 |      4,250        4.09       93.83
         25 |      1,829        1.76       95.59
         26 |        631        0.61       96.19
         27 |        589        0.57       96.76
         28 |        896        0.86       97.62
         29 |        894        0.86       98.48
         30 |      1,059        1.02       99.50
         31 |        169        0.16       99.67
         32 |         91        0.09       99.75
         33 |         40        0.04       99.79
         34 |         14        0.01       99.81
         35 |         26        0.03       99.83
         36 |          6        0.01       99.84
         37 |         19        0.02       99.85
         38 |         11        0.01       99.87
         39 |         12        0.01       99.88
         40 |         11        0.01       99.89
         41 |         11        0.01       99.90
         42 |         20        0.02       99.92
         43 |         16        0.02       99.93
         44 |         16        0.02       99.95
         45 |         21        0.02       99.97
         46 |          6        0.01       99.97
         47 |          8        0.01       99.98
         48 |         19        0.02      100.00
------------+-----------------------------------
      Total |    103,889      100.00
	
	tab max_month if delisted2010==1
			 max_month |      Freq.     Percent        Cum.
		------------+-----------------------------------
		         25 |        652        7.33        7.33
		         26 |        852        9.58       16.91
		         27 |      1,399       15.73       32.65
		         28 |      3,044       34.23       66.88
		         29 |      2,402       27.01       93.89
		         30 |        313        3.52       97.41
		         31 |        113        1.27       98.68
		         32 |         46        0.52       99.20
		         33 |         28        0.31       99.52
		         34 |         23        0.26       99.78
		         35 |          5        0.06       99.83
		         36 |          5        0.06       99.89
		         38 |          5        0.06       99.94
		         39 |          5        0.06      100.00
		------------+-----------------------------------
		      Total |      8,892      100.00
	*/
	//drop if month>=max_month & delisted2010==1
	
	drop if month>=30 & delisted2010==1
    drop if month>=25 & delisted==1
	
    sort pid zipcode
	by pid zipcode: egen max_month=max(month)
	by pid zipcode: egen max_week_num=max(week_num)
	by pid: egen min_max_month=min(max_month)
    
	/*
	//Select households that made only one purchase during the
	//whole sample period: 2008-2011  
    by hd_id, sort: egen nb_purchase=count(count)
    tab nb_purchase
    drop if nb_purchase>1
    */
    
    //Create identifiers for the various demographic groups used for the rebate paper #2 and EEgap paper
    
    //Household identifier
    by hd_id, sort: egen nb_purchase=count(count)
    tab nb_purchase
    mvencode o_qty,mv(1) over
    by hd_id, sort: egen total_qty=sum(o_qty) 
    tab total_qty
    gen Dhd=1
    replace Dhd=0 if total_qty>1 | nb_purchase>1
    
    gen Dcontractor=0
    replace Dcontractor=1 if (total_qty>2 | nb_purchase>2) & total_qty!=.  & nb_purchase!=.   
    
    gen miss_demo=0
    replace miss_demo=1 if income==. | age==. | adult==. | children==. | education==.
    
    gen Dhd_income_1=0
    gen Dhd_income_2=0
    gen Dhd_income_3=0
    gen Dhd_income_4=0	
    gen Dhd_income_5=0
    gen Dhd_income_6=0
    
    replace Dhd_income_1=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==1
  	replace Dhd_income_2=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==2
  	replace Dhd_income_3=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==3
  	replace Dhd_income_4=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==4
  	replace Dhd_income_5=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==5
  	replace Dhd_income_6=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==6
  	
  	gen Dhd_income_tert1=0
    gen Dhd_income_tert2=0
    gen Dhd_income_tert3=0
  	replace Dhd_income_tert1=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_tert==1
  	replace Dhd_income_tert2=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_tert==2
  	replace Dhd_income_tert3=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_tert==3
  	
  	gen Dhd_renter_1=0
  	gen Dhd_renter_2=0
    replace Dhd_renter_1=1 if Dhd==1 & miss_demo==0 & rent=="R" & income<6
    replace Dhd_renter_2=1 if Dhd==1 & miss_demo==0 & rent=="R" & income>=6
    
    
    gen Dhd_miss_demo=0
    replace Dhd_miss_demo=1 if Dhd==1 & miss_demo==1
    
    
    gen Dnonhd_tert1=0
    gen Dnonhd_tert2=0
    gen Dnonhd_tert3=0
    replace Dnonhd_tert1=1 if Dhd==0 & miss_demo==0 & rent!="R" & income_tert==1
    replace Dnonhd_tert2=1 if Dhd==0 & miss_demo==0 & rent!="R" & income_tert==2
    replace Dnonhd_tert3=1 if Dhd==0 & miss_demo==0 & rent!="R" & income_tert==3
    
    
    gen Dcontractor_tert1=0
    gen Dcontractor_tert2=0
    gen Dcontractor_tert3=0
    replace Dcontractor_tert1=1 if Dcontractor==1 & miss_demo==0 & rent!="R" & income_tert==1
    replace Dcontractor_tert2=1 if Dcontractor==1 & miss_demo==0 & rent!="R" & income_tert==2
    replace Dcontractor_tert3=1 if Dcontractor==1 & miss_demo==0 & rent!="R" & income_tert==3
    
    gen group_id=0
    replace group_id=1 if Dhd_income_1==1
    replace group_id=2 if Dhd_income_2==1
    replace group_id=3 if Dhd_income_3==1
    replace group_id=4 if Dhd_income_4==1
    replace group_id=5 if Dhd_income_5==1
    replace group_id=6 if Dhd_income_6==1
//     replace group_id=7 if Dhd_renter_1==1
//     replace group_id=8 if Dhd_renter_2==1
//     replace group_id=9 if Dnonhd_tert1==1
//     replace group_id=10 if Dnonhd_tert2==1
//     replace group_id=11 if Dnonhd_tert3==1
//     replace group_id=12 if Dhd_miss_demo==1
//     replace group_id=13 if Dcontractor_tert1==1
//     replace group_id=14 if Dcontractor_tert2==1
//     replace group_id=15 if Dcontractor_tert3==1
//    replace group_id=16 if Dhd==1 & miss_demo==0 & rent!="R"
    //replace group_id=17 if Dhd==1 & rent!="R"
//    replace group_id=18 if Dhd==0 & miss_demo==0 
    //replace group_id=19 if Dhd==0 
    //replace group_id=20 if miss_demo==0 
  
save $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017, replace

////////////////////////////////////////////////////////////////////////////////////////////////////// 
//Store description
//////////////////////////////////////////////////////////////////////////////////////////////////////  

use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017, clear
	by pid, sort: egen sales_pid=sum(count)		
	collapse(mean) sales_pid,by(state pid zip tri store)
	gen count=1
save $pathname\choiceset_046_2008_2012_struct_v11_11022017_robustb, replace

	collapse(sum) count,by(state zip tri store)
	ren count nb_models
//Discard choice sets with less than 3 models
	drop if nb_models<3
	sort state zip tri store
save $pathname\store_decription_2008_2012_struct_v11_11022017_robustb, replace
   
//Full sample with appropriate choice set
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017, clear
	sort state zip tri store
	merge state zip tri store using $pathname\store_decription_2008_2012_struct_v11_11022017_robustb
	tab _m
	keep if _m==3
	drop _m
	
	keep hd_id datenum group_id age adult children income* education state zipcode pid brand_id type_id week month year trimester count standard pcode  political_id

	sort zipcode year tri week
save $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb, replace	

use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb, clear
	
	collapse(sum) count,by(trimester pid zipcode year)
	by zipcode trim, sort: egen nb_models=count(count)
    sort zipcode pid trimester year 
save $pathname\choiceset_trimester_zipcode_2008_2012_struct_v11_11022017_robustb, replace
	
//Create a weekly file 
use $pathname\choiceset_trimester_zipcode_2008_2012_struct_v11_11022017_robustb, clear
		
		foreach x in trimester {
			expand 17 if trimester==1 | trimester==2 | trimester==4 | trimester==5 | trimester==7 | trimester==8 | trimester==10 | trimester==11 | trimester==13  | trimester==14
			expand 18 if trimester==3 | trimester==6 | trimester==9 | trimester==12 | trimester==15 
		}

		sort zipcode pid trimester
		by zipcode pid trimester: egen week=seq()
		replace week=week+17 if trimester==2 | trimester==5 | trimester==8 | trimester==11 | trimester==14 
		replace week=week+34 if trimester==3 | trimester==6 |  trimester==9 |  trimester==12 |  trimester==15  
		keep zipcode pid year trimester week
    	sort zipcode pid trimester week year 
save $pathname\choiceset_trimester_week_zipcode_2008_2012_struct_v11_11022017_robustb_nocensor, replace

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Tax Rate
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

use  $pathname\tax_rate_estar_avg_month_zip_date_2008_2012, clear
	sort state datenum
	merge state datenum using $sales_tax_path\sales_tax_holiday
	tab _m
	drop if _m==2
	drop _m
	gen Dtax_holiday=(fridge==1 & policy=="sales tax holiday")
	
    keep zip datenum tax_rate_mode_0461 tax_rate_mode_0460 tax_rate_holidays_0461 tax_rate_holidays_0460 Dtax_holiday
	mvencode tax_rate_mode_0461 tax_rate_mode_0460 tax_rate_holidays_0461 tax_rate_holidays_0460 Dtax_holiday, mv(0) over
    order zip datenum tax_rate_mode_0461 tax_rate_mode_0460 tax_rate_holidays_0461 tax_rate_holidays_0460 Dtax_holiday
	
	sort zip datenum
save   $pathname\tax_rate_zipday_046_2008_2012_struct_NoHd, replace 	
 

}

//////////////////////////////////////////////////////////////////////////////////////////////////////    
 
 
use	$pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb, clear
//Random sample 
    keep if group_id==`group_id_selected'
	sort  hd_id	
	
	bsample `subsample'
	
	//Drop pid with low sales
    bys pid: egen sales_pid=sum(count)
	drop if sales_pid<`sales_pid_cutoff'
	sort hd_id datenum pid
	ren hd_id hd_id_original
	egen hd_id=seq()
save $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', replace



//Create Matching Indices
	keep hd_id zipcode pid week year trimester datenum
	sort zipcode year tri week datenum
save $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace  

//Create PID Identifier
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	collapse(sum) count,by(pid)
	ren count sales_pid
	sort sales_pid
 
	egen pid_id=seq()
	sort pid
save $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace	

//Create 2010 Delisted Identifier
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017, clear
	sort pid 
	merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m
	
	bys pid: egen delisted2010_tmp=max(delisted2010)
	drop delisted2010
	ren delisted2010_tmp delisted2010	
	
	bys pid: egen delisted_tmp=max(delisted)
	drop delisted
	ren delisted_tmp delisted	
	
	collapse(mean) delisted delisted2010,by(pid)
	sort pid 
save $pathname\pid_id_delisted_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace	

//Identify last week and month for each pid/zipcode
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017, clear
	sort pid 
	merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m
	
	bys pid: egen delisted2010_tmp=max(delisted2010)
	drop delisted2010
	ren delisted2010_tmp delisted2010	
	
	bys pid: egen delisted_tmp=max(delisted)
	drop delisted
	ren delisted_tmp delisted	
	
	collapse(mean) max_week_num max_month delisted delisted2010,by(pid zipcode)

	sort pid zipcode
save $pathname\pid_id_max_week_month_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace	


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Nb of Models
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb, clear
	collapse(sum) count,by(trimester standard pid zipcode year)
	by standard zipcode trim, sort: egen nb_models=count(count)
	collapse(mean) nb_models,by(trimester pid zipcode year)

    sort pid
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m
	
	keep nb_models zipcode pid_id trimester year 
	reshape wide nb_models, i(year trimester zip) j(pid_id)
	mvencode nb_models*, mv(0) over
		foreach x in trimester {
			expand 17 if trimester==1 | trimester==2 | trimester==4 | trimester==5 | trimester==7 | trimester==8 | trimester==10 | trimester==11 | trimester==13  | trimester==14 
			expand 18 if trimester==3 | trimester==6 | trimester==9 | trimester==12 | trimester==15 
		}

		sort zipcode trimester
		by zipcode trimester: egen week=seq()
		replace week=week+17 if trimester==2 | trimester==5 | trimester==8 | trimester==11 | trimester==14 
		replace week=week+34 if trimester==3 | trimester==6 |  trimester==9 |  trimester==12 |  trimester==15 
	keep  zipcode year trimester week nb_models*
    sort zipcode year tri week
save $pathname\nbmodels_standards_trimester_zipcode_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week  using $pathname\nbmodels_standards_trimester_zipcode_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd 
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\nbmodels_standards_trimester_zipcode_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Create Choice Set Identifier
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

use $pathname\choiceset_trimester_week_zipcode_2008_2012_struct_v11_11022017_robustb_nocensor, clear

		sort pid zipcode
		merge pid zipcode using  $pathname\pid_id_max_week_month_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
		tab _m
		keep if _m==3
		drop _m
		
		drop if week>=22 & year==2010 & delisted2010==1
		drop if  year==2011 & delisted2010==1
		drop if  year==2012 & delisted2010==1
    	drop if  year>2009 & delisted==1
		
		keep zipcode pid year trimester week
    	sort zipcode pid trimester week year 
save $pathname\choiceset_trimester_week_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', replace
 
    	
//Look at stores (zipcode) present
use $pathname\choiceset_trimester_zipcode_2008_2012_struct_v11_11022017_robustb, clear
     
	 sort pid
     merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	 tab _m
	 keep if _m==3
	 drop _m
     
	 collapse(count) count, by(zipcode)
     sort zipcode
     egen zip_id=seq()
     egen maxzip_id=max(zip_id)
   	 levelsof maxzip_id,local(nbzip)
     sort zip_id
save $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', replace


//CHECK SIZE OF THE FILE AND REPLACE EXPAND ZIPCODE BELOW 
display `nbzip'

use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb, clear
	collapse(sum) count,by(zipcode state)
	drop count
	sort zipcode
save $pathname\map_zip_to_state_2008_2012_struct, replace


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Demographics
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	//gen income1=cond(income==1,1,0)
	gen income2=cond(income_tert==5,1,0)	
	gen income3=cond(income_tert==6,1,0)	
	gen education2=cond(education==2,1,0)	
	gen education3=cond(education==3,1,0)	
	//gen housing1=cond(housing==1,1,0)
	//gen rent1=cond(rent==1,1,0)
	gen fam_size=adult+children
	//mvencode income* education* housing* rent* fam_size age, mv(0) over
	//keep hd_id income2 income3 education2 education3  housing1 rent1 fam_size age 
	//order hd_id income2 income3 education2 education3  housing1 rent1 fam_size age 
	gen political2=cond(political_id==2,1,0)	
	gen political3=cond(political_id==3,1,0)
	egen mean_fam_size=mean(fam_size)
	egen mean_age=mean(age)	
	replace age=mean_age if age==.
	replace fam_size=mean_fam_size if fam_size==.
	//mvencode income* education* fam_size age, mv(0) over
	keep hd_id income2 income3 education2 education3 fam_size age  political2 political3
	order hd_id income2 income3 education2 education3 fam_size age   political2 political3
	sort hd_id
save $pathname\demo_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace

use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	//gen income1=cond(income==1,1,0)
	gen income2=cond(income_tert==5,1,0)	
	gen income3=cond(income_tert==6,1,0)
	gen education2=cond(education==2,1,0)	
	gen education3=cond(education==3,1,0)	
	//gen housing1=cond(housing==1,1,0)
	//gen rent1=cond(rent==1,1,0)
	gen fam_size=adult+children
	//mvencode income* education* housing* rent* fam_size age, mv(0) over
	//keep hd_id income2 income3 education2 education3  housing1 rent1 fam_size age 
	//order hd_id income2 income3 education2 education3  housing1 rent1 fam_size age 
	gen political2=cond(political_id==2,1,0)	
	gen political3=cond(political_id==3,1,0)
	gen fam_size2=cond(fam_size>=3,1,0)	
	gen age_2=cond(age>=35 & age<50,1,0)
	gen age_3=cond(age>=50 & age<65,1,0)
	gen age_4=cond(age>=65,1,0)
			
	//mvencode income* education* fam_size age, mv(0) over
	keep hd_id income2 income3 education2 education3 fam_size2 age_2 age_3 age_4 political2 political3
	order hd_id income2 income3 education2 education3 fam_size2 age_2 age_3 age_4 political2 political3
	sort hd_id
save $pathname\demo_cat_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace


use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	//gen income1=cond(income==1,1,0)
	gen income2=cond(income_tert==5,1,0)	
	gen income3=cond(income_tert==6,1,0)
	gen education2=cond(education==2,1,0)	
	gen education3=cond(education==3,1,0)	
	gen education4=cond(education==.,1,0)	
	//gen housing1=cond(housing==1,1,0)
	//gen rent1=cond(rent==1,1,0)
	gen fam_size=adult+children
	gen fam_size2=cond(fam_size>=3 & fam_size!=.,1,0)
	gen fam_size3=cond(fam_size==.,1,0)
	
	//mvencode income* education* housing* rent* fam_size age, mv(0) over
	//keep hd_id income2 income3 education2 education3  housing1 rent1 fam_size age 
	//order hd_id income2 income3 education2 education3  housing1 rent1 fam_size age 
	gen political2=cond(political_id==2,1,0)	
	gen political3=cond(political_id==3,1,0)
		
	gen age_2=cond(age>=35 & age<50 & age!=.,1,0)
	gen age_3=cond(age>=50 & age!=.,1,0)
	gen age_4=cond(age==.,1,0)
			
	//mvencode income* education* fam_size age, mv(0) over
	keep hd_id income2 income3 education2 education3 education4 fam_size2 fam_size3 age_2 age_3 age_4 political2 political3
	order hd_id income2 income3 education2 education3 education4 fam_size2 fam_size2 age_2 age_3 age_4 political2 political3
	sort hd_id
save $pathname\demo_cat_NA_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//bchoice
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	sort pid
	merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m
	keep hd_id pid_id 
	duplicates list hd_id
	gen bchoice=1
	reshape wide bchoice,i(hd_id) j(pid_id)
	mvencode bchoice*, mv(0) over
	sort hd_id
save $pathname\bchoice_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace
	
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Size Purchassed
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	sort pid
	merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m
	keep hd_id pid_id pid
	duplicates list hd_id
	sort pid
	merge pid using  $pathname\create_agg_choice
	tab _m
	keep if _m==3
	drop _m
	keep hd_id overall
	sort hd_id
save $pathname\sizebought_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Size Models
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use $pathname\create_agg_choice, clear
	sort pid
	merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m 
	keep pid_id overall
	sort pid_id
save $pathname\sizeoffer_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//ID
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use $pathname\choiceset_trimester_week_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', replace
//use $pathname\choiceset_trimester_zipcode_`year'_`subsample'_`group_id_selected'_seed_`set_seed', replace
//use $pathname\choiceset_trimester_zipcode_`year'_`subsample'_`group_id_selected'_seed_`set_seed', clear
	sort pid
	merge pid using  $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	tab _m
	keep if _m==3
	drop _m
	keep year trimester week zip  pid_id 
	fillin zipcode pid_id year trimester
	ren _f Dpresent 
	replace Dpresent=2 if Dpresent==1
	replace Dpresent=1 if Dpresent==0
	replace Dpresent=0 if Dpresent==2
	reshape wide Dpresent, i(year trimester week zip) j(pid_id)
	mvencode Dpresent*, mv(0) over
// 	sort zip tri
// 	merge zip tri using $pathname\store_`subsample'_`group_id_selected'_seed_`set_seed'_struc_`year'
// 	tab _m
	
	
//save $pathname\choiceset_identifier_trimester_zipcode_`year'_`subsample'_`group_id_selected'_seed_`set_seed', replace

//Add weeks
// 	foreach x in trimester {
// 		expand 17 if trimester!=3
// 		expand 18 if trimester==3
// 	}
// 	sort zipcode trimester
// 	by zipcode trimester: egen week=seq()
// 	replace week=week+17 if trimester==2
// 	replace week=week+34 if trimester==3  
	keep  zipcode year trimester week D*
    sort zipcode year tri week
save $pathname\choiceset_identifier_trimester_week_zipcode_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Prices V2
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

//do H:\Research\sears\estar_scripts\MakePricesTS_11022017.do year_p

use $pathname\lcidemo_046_jan2008_dec2008_week_store_ts_11022017, clear
	keep if year==2008
	gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
save $pathname\lcidemo_046_jan2008_dec2008_week_store_ts_11022017_tmp, replace

use $pathname\lcidemo_046_jan2009_dec2009_week_store_ts_11022017, clear
	keep if year==2009
	gen trimester=cond(week>=1 & week<=17,4,cond(week>=18 & week<=34,5,6))
save $pathname\lcidemo_046_jan2009_dec2009_week_store_ts_11022017_tmp, replace

use $pathname\lcidemo_046_jan2010_dec2010_week_store_ts_11022017, clear
	keep if year==2010
	gen trimester=cond(week>=1 & week<=17,7,cond(week>=18 & week<=34,8,9))
save $pathname\lcidemo_046_jan2010_dec2010_week_store_ts_11022017_tmp, replace

use $pathname\lcidemo_046_jan2011_dec2011_week_store_ts_11022017, clear
	keep if year==2011
	gen trimester=cond(week>=1 & week<=17,10,cond(week>=18 & week<=34,11,12))
save $pathname\lcidemo_046_jan2011_dec2011_week_store_ts_11022017_tmp, replace

use $pathname\lcidemo_046_jan2012_dec2012_week_store_ts_11022017, clear
	keep if year==2012
	gen trimester=cond(week>=1 & week<=17,13,cond(week>=18 & week<=34,14,15))
save $pathname\lcidemo_046_jan2012_dec2012_week_store_ts_11022017_tmp, replace

use $pathname\lcidemo_046_jan2012_dec2012_week_store_ts_11022017_tmp, clear
	append using $pathname\lcidemo_046_jan2008_dec2008_week_store_ts_11022017_tmp
	append using $pathname\lcidemo_046_jan2009_dec2009_week_store_ts_11022017_tmp
	append using $pathname\lcidemo_046_jan2010_dec2010_week_store_ts_11022017_tmp
	append using $pathname\lcidemo_046_jan2011_dec2011_week_store_ts_11022017_tmp

// The choice set is infered differently than for 
	
	 //Keep only the pid and zipcode observed in each trimester
	sort zipcode pid trimester year 
	merge zipcode pid trimester year using $pathname\choiceset_trimester_zipcode_2008_2012_struct_v11_11022017_robustb
	tab _m
	keep if _m==3
	drop _m
    //Add additional weeks at the end and beginning of each trimester
	sort zipcode pid trimester week year 
    merge zipcode pid trimester week year  using $pathname\choiceset_trimester_week_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    ren _m merge_choiceset
    
    //Impute prices
    sort pid trimester week state
    
    //I should correct for qty here, but _f prices are robustd to this because they rely on mode.
    
    foreach x in retail_f promo_f{
		by pid trimester week state: egen pid_week_state_`x'=mean(`x')
   	}  
    
  	foreach x in retail_f promo_f{
		by pid trimester week: egen pid_week_`x'=mean(`x')
   	}  
   
  	foreach x in  retail_f promo_f{
		by pid trimester: egen pid_trimester_`x'=mode(`x')
   	} 
   	
    
  	foreach x in  retail_f promo_f{
		by pid: egen tpid_`x'=mean(`x')
   	} 

    replace promo_f=pid_week_state_promo_f if merge_choiceset==2
    replace retail_f=pid_week_state_retail_f if merge_choiceset==2
   	replace promo_f=pid_week_promo_f if merge_choiceset==2
    replace retail_f=pid_week_retail_f if merge_choiceset==2
    replace promo_f=pid_trimester_promo_f if merge_choiceset==2
    replace retail_f=pid_trimester_retail_f if merge_choiceset==2
    replace promo_f=tpid_promo_f if merge_choiceset==2
    replace retail_f=tpid_retail_f if merge_choiceset==2
    
    drop pid_week*
    drop pid_trimester*
    drop tpid*
    
    //We don't drop online before because we use online prices to impute
    drop if online==1
    sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep state zipcode pid pid_id week month year trimester promo_f retail_f 
//save $pathname\price_046_jan`year'_dec`year'_week_store_struct_tmp_`subsample'_`group_id_selected'_seed_`set_seed', replace    

	drop retail_f pid
	collapse(mean) promo_f, by(year trimester week zip pid_id)
	sort pid_id zipcode year week
save $pathname\price_mean_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_vLong, replace    
	
    reshape wide promo_f, i(year trimester week zip) j(pid_id)
    fillin year week zip
    replace trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3)) if trimester==. & year==2008
    replace trimester=cond(week>=1 & week<=17,4,cond(week>=18 & week<=34,5,6)) if trimester==. & year==2009
    replace trimester=cond(week>=1 & week<=17,7,cond(week>=18 & week<=34,8,9)) if trimester==. & year==2010
    replace trimester=cond(week>=1 & week<=17,10,cond(week>=18 & week<=34,11,12)) if trimester==. & year==2011
    replace trimester=cond(week>=1 & week<=17,13,cond(week>=18 & week<=34,14,15)) if trimester==. & year==2012
    tab _f
    drop _f
    mvencode promo*, mv(0) over
    sort zipcode year tri week
save $pathname\price_mean_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace       
    

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Create a yearly attribute file
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    


use  $pathname\attributes_2008_weekly, clear
	keep pid week s_estar kwh mef type_id AV ice_sc brand_id standard_class type_id 
	gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
	gen year=2008
save  $pathname\attributes_2008_weekly_tri, replace


use  $pathname\attributes_2009_weekly, clear
	keep pid week s_estar kwh mef type_id AV ice_sc brand_id standard_class type_id 
	gen trimester=cond(week>=1 & week<=17,4,cond(week>=18 & week<=34,5,6))
	gen year=2009
save  $pathname\attributes_2009_weekly_tri, replace


use  $pathname\attributes_2010_weekly, clear
	keep pid week s_estar  kwh mef type_id AV ice_sc  brand_id standard_class type_id delisted2010
	gen trimester=cond(week>=1 & week<=17,7,cond(week>=18 & week<=34,8,9))
	gen year=2010
save  $pathname\attributes_2010_weekly_tri, replace 


use  $pathname\attributes_2011_weekly, clear
	keep pid week s_estar  kwh mef type_id AV ice_sc  brand_id standard_class type_id 
	gen trimester=cond(week>=1 & week<=17,10,cond(week>=18 & week<=34,11,12))
	gen year=2011
save  $pathname\attributes_2011_weekly_tri, replace 

use  $pathname\attributes_2012_weekly, clear
	keep pid week s_estar  kwh mef type_id AV ice_sc  brand_id standard_class type_id 
	gen trimester=cond(week>=1 & week<=17,13,cond(week>=18 & week<=34,14,15))
	gen year=2012
save  $pathname\attributes_2012_weekly_tri, replace 

	append using  $pathname\attributes_2008_weekly_tri
	append using  $pathname\attributes_2009_weekly_tri
	append using  $pathname\attributes_2010_weekly_tri
	append using  $pathname\attributes_2011_weekly_tri
	mvencode delisted2010,mv(0) over
save  $pathname\attributes_2008_2012_weekly_tri, replace 

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Energy Star Dummy  
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    
   
use  $pathname\attributes_2008_2012_weekly_tri, clear
keep year tri pid week s_estar
// 	keep pid week s_estar 
// 	gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
// 	gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id s_estar
    reshape wide s_estar, i(year trimester week) j(pid_id)
    //subsample: 80000
    	//expand 1692 
   	//subsample: 7000
    	//expand 1328
   	//subsample: 20000
    	//expand 1605
    //subsample: 20001 no renters and multi-familly housing
    display `nbzip'
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep zipcode year tri week s_estar*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\estar_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace      
    
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Energy Star KWH standard
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    
 
use  $pathname\attributes_2008_2012_weekly_tri, clear
keep year tri pid week mef
// 	keep pid week s_estar 
// 	gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
// 	gen year=`year'
	gen estar_kwh=mef
	replace estar_kwh=mef*0.80
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id estar_kwh
    reshape wide estar_kwh, i(year trimester week) j(pid_id)
    //subsample: 80000
    	//expand 1692 
   	//subsample: 7000
    	//expand 1328
   	//subsample: 20000
    	//expand 1605
    //subsample: 20001 no renters and multi-familly housing
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep zipcode year tri week estar_kwh*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\estarkwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace  
   

 
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Type_id Dummies
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    
   
use  $pathname\attributes_2008_2012_weekly_tri, clear
	keep year tri pid week type_id
	gen type_id2=cond(type_id==2,1,0)
	//gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
	//gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id type_id2
    reshape wide type_id2, i(year trimester week) j(pid_id)
    display `nbzip'
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    drop _m
    keep zipcode year tri week type_id2*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\type_id2_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace   

use  $pathname\attributes_2008_2012_weekly_tri, clear
	keep year tri pid week type_id
	gen type_id3=cond(type_id==3,1,0)
	//gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
	//gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id type_id3
    reshape wide type_id3, i(year trimester week) j(pid_id)
    display `nbzip'
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    drop _m
    keep zipcode year tri week type_id3*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\type_id3_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace   


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Ice Maker Dummy
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    
   
use  $pathname\attributes_2008_2012_weekly_tri, clear
	keep year tri pid week ice_sc
	//gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
	//gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id ice_sc
    reshape wide ice_sc, i(year trimester week) j(pid_id)
    display `nbzip'
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    drop _m
    keep zipcode year tri week ice_sc*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\ice_sc_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace 



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//AV
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////    
   
use  $pathname\attributes_2008_2012_weekly_tri, clear
keep year tri pid week AV
	//gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
	//gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id AV
    reshape wide AV, i(year trimester week) j(pid_id)
    display `nbzip'
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    drop _m
    keep zipcode year tri week AV*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\AV_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Energy Cost
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////   

use  $pathname\attributes_2008_2012_weekly_tri, clear
keep year tri pid week kwh
// 	keep pid week s_estar 
// 	gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
// 	gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id kwh
    reshape wide kwh, i(year trimester week) j(pid_id)
    //subsample: 80000
    	//expand 1692 
   	//subsample: 7000
    	//expand 1328
   	//subsample: 20000
    	//expand 1605
    //subsample: 20001 no renters and multi-familly housing
    	expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep zipcode year tri week kwh*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\kwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace       

use $pathname\kwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, clear
    sort zipcode 
    merge zipcode using $pathname\map_zip_to_state_2008_2012_struct
    tab _m
    keep if _m==3
    drop _m
    sort state year
    merge state year using $pathname\electricity\electricity_price_state_2007_2012
    tab _m	
    tab state if _m==1
    drop if _m==2
    drop _m	
	keep zipcode year tri week p_elec
	replace p_elec=p_elec/100
	mvencode p_elec,mv(0.20) over
	order zipcode year tri week
    sort zipcode year tri week
save $pathname\elec_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace   

use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017, clear
	collapse(count) count,by(zipcode state)
	keep zipcode state
	sort zipcode
save $pathname\map_zip_state_046_2008_2012_struct_tmp, replace

use $pathname\kwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, clear
 	sort zipcode
	merge zipcode using $censuspath\mapping_zip_county_nov99
	tab _m
	drop if _m==2
	drop _m
	drop latitude longitude zip_class poname state county v8 v9
	ren county5 county_utility
 	sort county_utility year
	//merge county_utility year using "$pathname\electricity\county_price_tmp"  
  	merge county_utility year using "$pathname\electricity\county_elec_price_2007_2012"
	tab _m
  	drop if _m==2  
  	drop _m
  	//drop state
  	sort zipcode
  	merge zipcode using $pathname\map_zip_state_046_2008_2012_struct_tmp
  	tab _m
  	drop _m
  	sort state year
    merge state year using $pathname\electricity\electricity_price_state_2007_2012
    tab _m	
    tab state if _m==1
    drop if _m==2
  	replace pcount=p_elec if pcount==.
	sort state
	ren _m merge_eleccounty
	drop p_elec
	ren pcount p_elec
	replace p_elec=p_elec/100
	keep zipcode year tri week p_elec
    sort zipcode year tri week
save $pathname\elec_county_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace  



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Rebates: Utility + CFA
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

use $pathname\estar_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, clear  
 	sort zipcode
	merge zipcode using $censuspath\mapping_zip_county_nov99
	tab _m
	drop if _m==2
	drop _m latitude longitude zip_class state poname county v8 v9
	ren county5 county_utility
	
    //Rebate
 	sort county_utility year week
	merge county_utility year week using "$pathname\rebate\DSIRE_rebate_week_county_2007_2013"  
	tab _m
	drop if _m==2
	drop _m
	ren incentive incentive_utility
	mvencode incentive_utility,mv(0) over 
		
	drop state
  	sort zipcode
  	merge zipcode using $pathname\map_zip_state_046_2008_2012_struct_tmp
  	tab _m
  	drop _m
  	
  	collapse(sum) incentive_utility,by(state zipcode year tri week)
  	
	sort state year week
	merge state year week using "$pathname/rebate/Cash4Appliances/cash4appliance_refrigerators_weekly_vf_tmp"
	tab _m
	drop if _m==2
	rename _m merge_rebate
	ren incentive incentive_cfa
	mvencode incentive_cfa,mv(0) over 
	gen amount=incentive_utility+incentive_cfa
		
	sort zipcode year tri week
    by zipcode year tri week: egen dup=count(amount)
	
preserve
	
	keep zipcode year tri week amount
    sort zipcode year tri week
    
save $pathname\rebate_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace  	

restore   
 
preserve

	keep zipcode year tri week incentive_utility
    sort zipcode year tri week
    
save $pathname\rebate_utility_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace  	
    
restore 


	sort state year week
	merge state year week using  "$pathname\rebate\Cash4Appliances\Cash4Appliances_announcement"
	tab _m
	ren _m merge_announce
	gen week_announce_tmp=week if merge_announce==3
	
	replace week_announce=week_announce+52 if year==2009
	replace week_announce=week_announce+104 if year==2010
	replace week_announce=week_announce+156 if year==2011
	replace week_announce=week_announce+208 if year==2012
	
	by state, sort: egen week_announce=max(week_announce_tmp) 

	 gen week_num=week
    replace week_num=week_num+52  if year==2009
    replace week_num=week_num+104 if year==2010
    replace week_num=week_num+156 if year==2011
    replace week_num=week_num+208 if year==2012
    
	sort state
	by state: egen max_incentive=max(incentive_cfa)
	gen Dstate=1
	replace Dstate=0 if max_incentive==0
	sort state merge_rebate week_num
	by state merge_rebate: egen id_w=seq()
	gen st_week_tmp=week_num if id_w==1 & merge_rebate==3
	by state: egen st_week=max(st_week)
	gen week_since=week_num-st_week
	gen period=-1 if week_since<0
	replace period=0 if week_since>=0 & incentive_cfa>0
	replace period=1 if week_since>=0 & incentive_cfa==0 & max_incentive>0
	replace period=-2 if week_since<0 & week_num<week_announce
	
	gen Dperiod_0=cond(period==-2,1,0)
	gen Dperiod_1=cond(period==-1,1,0)
	gen Dperiod_2=cond(period==0,1,0)
	gen Dperiod_3=cond(period==1,1,0)
	
	gen Dperiod_0_inc=Dperiod_0*max_incentive
	gen Dperiod_1_inc=Dperiod_1*max_incentive
	gen Dperiod_2_inc=Dperiod_2*max_incentive
	gen Dperiod_3_inc=Dperiod_3*max_incentive

	sort state period week_num
	by state period: egen id_tmp2=seq()
	gen week_end_tmp=week_since if id_tmp2==1 & period==1 
	sort state
	by state: egen week_end=min(week_end_tmp)
	replace week_since=week_since-week_end if period==1
	
	gen Dduring_cfa=0
	replace Dduring_cfa=1 if period==0 
	gen Dduring_cfa_inc=Dduring_cfa*max_incentive
	
	gen D2Mbefore_cfa=0
	replace D2Mbefore_cfa=1 if (period==-1 & week_since>=-8)
	gen D2Mbefore_cfa_inc=D2Mbefore_cfa*max_incentive
	
	gen D2Mafter_cfa=0
	replace D2Mafter_cfa=1 if (period==1 & week_since<=9)
	gen D2Mafter_cfa_inc=D2Mafter_cfa*max_incentive

	keep zipcode year tri week incentive_cfa Dduring_cfa_inc D2Mbefore_cfa_inc D2Mafter_cfa_inc
    sort zipcode year tri week
    
save $pathname\rebate_cfa_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace  	
    



use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear

//Create Matching Indices
	keep hd_id zipcode pid week year trimester datenum
	sort zipcode year tri week datenum
save $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace  


use $pathname\choiceset_trimester_zipcode_2008_2012_struct_v11_11022017_robustb, clear
     
	 sort pid
     merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
	 tab _m
	 keep if _m==3
	 drop _m
     
	 collapse(count) count, by(zipcode)
     sort zipcode
     egen zip_id=seq()
     egen maxzip_id=max(zip_id)
   	 levelsof maxzip_id,local(nbzip)
     sort zip_id
save $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', replace


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Type_id_2_3
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

use  $pathname\attributes_2008_2012_weekly_tri, clear
	keep year tri pid week type_id
	gen type_id_2_3=cond((type_id==2 | type_id==3),1,0)
	//gen trimester=cond(week>=1 & week<=17,1,cond(week>=18 & week<=34,2,3))
	//gen year=`year'
	sort pid 
    merge pid using $pathname\pid_id_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    keep year trimester week pid_id type_id_2_3
    reshape wide type_id_2_3, i(year trimester week) j(pid_id)
    display `nbzip'
    expand `nbzip'
    sort year trimester week
    by year trimester week: egen zip_id=seq()
    sort zip_id
    merge zip_id using  $pathname\choiceset_zipcode_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    drop _m
    keep zipcode year tri week type_id_2_3*
    order zipcode year tri week
    sort zipcode year tri week
save $pathname\type_id_2_3_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace   

   

 
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Create Files Ready for Matlab
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\type_id_2_3_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\type_id_2_3_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\choiceset_identifier_trimester_week_zipcode_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\choiceset_identifier_trimester_week_zipcode_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\price_mean_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\price_mean_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\estar_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\estar_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\estarkwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\estarkwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\kwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id 
outsheet using $pathname\kwh_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\elec_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id   
outsheet using $pathname\elec_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\elec_county_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id   
outsheet using $pathname\elec_county_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\rebate_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id 
outsheet using $pathname\rebate_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\rebate_utility_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id 
outsheet using $pathname\rebate_utility_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\rebate_cfa_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id 
outsheet using $pathname\rebate_cfa_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	sort zip datenum
	merge zip datenum using $pathname\tax_rate_zipday_046_2008_2012_struct_NoHd  
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\tax_rate_zipday_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname




use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	sort zip
	merge zip using $pathname\tax_rate_046_2008_2012_struct_NoHd  
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\tax_rate_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname




use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\type_id2_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\type_id2_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname

use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\type_id3_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\type_id3_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\ice_sc_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\ice_sc_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\AV_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\AV_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\bchoice_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear
	drop if hd_id==.
outsheet using $pathname\bchoice_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\demo_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear
	drop if hd_id==.
outsheet using $pathname\demo_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\demo_cat_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear
	drop if hd_id==.
outsheet using $pathname\demo_cat_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\demo_cat_NA_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear
	drop if hd_id==.
outsheet using $pathname\demo_cat_NA_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\sizebought_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear
	order hd_id overall
	sort hd_id
outsheet using $pathname\sizebought_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname	


use $pathname\sizeoffer_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace
 	order pid_id overall
 	sort pid_id
outsheet using $pathname\sizeoffer_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname	


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//State FE
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use $censuspath\state_accro, clear
	sort state
	drop if state=="" | state=="AS" | state=="FM" | state=="GU" | state=="MP" | state=="PW" | state=="MH" 
	drop state_id
	sort state
	egen state_id=seq()
	sort state
save $censuspath\state_accro_tmp, replace


use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	collapse(sum) count,by(zipcode state)
	drop count
	sort zipcode
save $pathname\map_zip_to_state_`subsample'_`group_id_selected'_seed_`set_seed', replace

//use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
use $pathname\lcidemo_046_2008_2012_struct_v11_11022017_robustb_`subsample'_`group_id_selected'_seed_`set_seed', clear
	sort state
    merge state using $censuspath\state_accro_tmp
    tab _m
    keep if _m==3      
	keep hd_id state_id
    sort hd_id
save $pathname\state_id_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', replace
outsheet using $pathname\state_id_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname


use $pathname\rebate_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, clear  	
	sort zipcode 
    merge zipcode using $pathname\map_zip_to_state_`subsample'_`group_id_selected'_seed_`set_seed'
    tab _m
    keep if _m==3
    drop _m
    sort state
    merge state using $censuspath\state_accro_tmp
    tab _m
    //keep if _m==3 
    keep zipcode year tri week state_id 
    gen state_dum=1
    reshape wide  state_dum, i(year trimester week zip) j(state_id)
	mvencode state_dum*, mv(0)
    sort zipcode year tri week
save $pathname\state_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd, replace   

    
use $pathname\matching_indices_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed', clear 
	merge zipcode year tri week using $pathname\state_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'_NoHd
	tab _m
	keep if _m==3
	drop _m
	sort hd_id
outsheet using $pathname\state_046_2008_2012_v11022017_struct_`subsample'_`group_id_selected'_seed_`set_seed'.csv, replace comma noquote noname



